Basic Imports
import geopandas as gpd
import pandas as pd
import dask.dataframe as dd
import numpy as np
from shapely.geometry import Polygon, MultiPolygon
from IPython.display import display, display_html
%matplotlib inline
def display_side_by_side(*args):
html_str=''
for df in args:
html_str+= " " + df.to_html()
display_html(html_str.replace('table','table style="display:inline"'),raw=True)
To download all neccessary data for this notebook, please run the following cell:
import os
import requests
import os
from multiprocessing.dummy import Pool
from functools import partial
import zipfile
def download_file(url, export_directory="", filename=None):
print("Download file %s to directory %s"%(url, export_directory))
if filename == None:
local_filename = os.path.join(export_directory,url.split('/')[-1])
else:
local_filename = os.path.join(export_directory,filename)
# NOTE the stream=True parameter
r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
#f.flush() commented by recommendation from J.F.Sebastian
return local_filename
#Create Folders for Data:
if not os.path.exists("Data"):
os.mkdir("Data")
if not os.path.exists(os.path.join("Data","New York Taxi")):
os.mkdir(os.path.join("Data","New York Taxi"))
if not os.path.exists(os.path.join("Data","OSM GPX")):
os.mkdir(os.path.join("Data","OSM GPX"))
#Download New York Taxi Data for Yellow Cabs in 2017:
download_taxi_data = partial(download_file, export_directory=os.path.join("Data","New York Taxi"))
request_strings = [r"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-%02d.csv"%i for i in range(1,13)]
pool = Pool(processes=4)
pool.map(download_taxi_data, request_strings)
#Download New York Taxi Data for Green Cabs in 2017:
download_taxi_data = partial(download_file, export_directory=os.path.join("Data","New York Taxi"))
request_strings = [r"https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2017-%02d.csv"%i for i in range(1,13)]
pool = Pool(processes=4)
pool.map(download_taxi_data, request_strings)
#Download shapefile of New York Taxi Zones:
print("Download Taxi Zones Shapefiles.")
zip_path = download_taxi_data(r"https://s3.amazonaws.com/nyc-tlc/misc/taxi_zones.zip")
print("Unzip Taxi Zones Shapefiles.")
with zipfile.ZipFile(zip_path, "r") as z:
z.extractall(os.path.dirname(zip_path))
os.remove(zip_path)
#Download OSM path data:
osm_path = download_file(r"https://ptv2box.ptvgroup.com/index.php/s/9sTUmxdF80NU2nr/download", export_directory=os.path.join("Data","OSM GPX"), filename="OSM_GPX.parquet")
In this section, we will load an process the shapefile "taxi_zones.shp" containing the New York Taxis Zones, such that we can use them as a basis for the Bokeh plot. In the next cell, we use GeoPandas to load the shapefile of Taxi zones and transform coordinate system to Web Mercador (EPSG-Code of Web Mercador Projection = 3785 ):
df_taxizones = gpd.read_file(r"Data\New York Taxi\taxi_zones.shp")
df_taxizones.to_crs(epsg=3785, inplace=True) #EPSG-Code of Web Mercador
display(df_taxizones.head())
print("Number of Polygons: %d"%len(df_taxizones))
df_taxizones.plot()
Simplify Shape of Zones (otherwise slow peformance of plot):
df_taxizones["geometry"] = df_taxizones["geometry"].simplify(100)
Convert WKT Polygons to X and Y arrays with corresponding coordinates. Take into account multipolygons and separate them into individual shapes:
data = []
for zonename, LocationID, borough, shape in df_taxizones[["zone", "LocationID", "borough", "geometry"]].values:
#If shape is polygon, extract X and Y coordinates of boundary line:
if isinstance(shape, Polygon):
X, Y = shape.boundary.xy
X = [int(x) for x in X]
Y = [int(y) for y in Y]
data.append([LocationID, zonename, borough, X, Y])
#If shape is Multipolygon, extract X and Y coordinates of each sub-Polygon:
if isinstance(shape, MultiPolygon):
for poly in shape:
X, Y = poly.boundary.xy
X = [int(x) for x in X]
Y = [int(y) for y in Y]
data.append([LocationID, zonename, borough, X, Y])
#Create new DataFrame with X an Y coordinates separated:
df_taxizones = pd.DataFrame(data, columns=["LocationID", "ZoneName", "Borough", "X", "Y"])
display(df_taxizones.head())
print("Number of Polygons: %d"%len(df_taxizones))
Here, we read in the data from New York Taxis and aggregate them to show us information about how frequent each taxi zone is visited. We will use Dask.DataFrame, such that the whole dataset can be loaded at once without blowing up our memory and to get a nice speedup due to the parallelizm of Dask. Let us start a Dask Client and Local Cluster (after the execution of the cell, click on the Dashboard Link to view the Dask Dashboard, where you can see the resource consumption of our computation
from dask.distributed import Client, LocalCluster
import dask.dataframe as dd
from dask import compute
cluster = LocalCluster()
print(cluster)
client = Client(cluster)
client
Using Dask, we now read in the New York Taxi Data for the yellow and green cabs into distributed DataFrames. Note: A Dask.DataFrame is a delayed object and to calculate results, one has to trigger the computation via the .compute() method.
df_taxis_yellow = dd.read_csv(r"Data\New York Taxi\yellow_tripdata_2017-*.csv",
usecols=["tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count",
"PULocationID", "DOLocationID"],
parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"])
df_taxis_green = dd.read_csv(r"Data\New York Taxi\green_tripdata_2017-*.csv",
usecols=["lpep_pickup_datetime", "lpep_dropoff_datetime", "passenger_count",
"PULocationID", "DOLocationID"],
parse_dates=["lpep_pickup_datetime", "lpep_dropoff_datetime"]).rename(
columns = {"lpep_pickup_datetime": "tpep_pickup_datetime",
"lpep_dropoff_datetime": "tpep_dropoff_datetime"})
df_taxis = dd.concat([df_taxis_yellow, df_taxis_green])
#Filter data for correct year :
df_taxis = df_taxis[(df_taxis["tpep_pickup_datetime"].dt.year == 2017)&(df_taxis["tpep_dropoff_datetime"].dt.year == 2017)]
df_taxis.head()
Now, we calculate the number of boarding and alighting passengers for each zone and every hour and every day using the GroupBy Method. For this, we first create two columns specifying the hour and daytype of Pickup and Dropoff. Then, we define the groupby operations for Pickups and Dropoffs: and finally we trigger the parallelized computation using dask.compute:
df_taxis["Pickup_Hour"] = df_taxis["tpep_pickup_datetime"].dt.hour
df_taxis["Dropoff_Hour"] = df_taxis["tpep_dropoff_datetime"].dt.hour
df_taxis["weekday"] = df_taxis["tpep_dropoff_datetime"].dt.weekday
pickups = df_taxis.groupby(by=["Pickup_Hour", "weekday", "PULocationID"])["passenger_count"].sum()
dropoffs = df_taxis.groupby(by=["Dropoff_Hour", "weekday", "DOLocationID"])["passenger_count"].sum()
pickups, dropoffs = compute(pickups, dropoffs)
Time Series of Pickups and Dropoffs
Aggregate Pickups and Dropoffs hourly:
df_pudo = pd.DataFrame(pickups.groupby(level=0).sum())
df_pudo["Dropoff"] = dropoffs.groupby(level=0).sum()
df_pudo.columns = ["P", "D"]
df_pudo.index.rename("Hour", inplace=True)
df_pudo.head(3)
Plot with Holoviews (Backend: Bokeh)
import holoviews as hv
hv.extension('bokeh')
%%opts Bars.Grouped [group_index='Group' toolbar='above' tools=['hover'] width=800]
from itertools import product
hours, groups = df_pudo.index.values, ['P', 'D']
keys = product(hours, groups)
bars = hv.Bars([(hour, pudo, df_pudo.loc[hour, pudo]) for hour, pudo in keys],
['Hour', "Group"], "Passengers")
bars.relabel(group='Grouped')
Finally, join the Taxi Zones DataFrame with the information about the Pickups and Dropoffs for every hour and weekday:
display_side_by_side(pd.DataFrame(pickups).head(), df_taxizones[["LocationID", "ZoneName", "X"]].head())
pickups = pd.DataFrame(pickups)
dropoffs = pd.DataFrame(dropoffs)
for hour in range(24):
for weekday in range(7):
#Get pickups and dropoff for this hour and weekday:
p = pd.DataFrame(pickups.loc[(hour, weekday)]).reset_index().rename(columns={"PULocationID" : "LocationID"})
d = pd.DataFrame(dropoffs.loc[(hour, weekday)]).reset_index().rename(columns={"DOLocationID" : "LocationID"})
#Add information of pickups and dropoff to the New York Taxi Zone DataFrame:
df_taxizones = pd.merge(df_taxizones, p, on="LocationID", how="left").fillna(0)
df_taxizones.rename(columns={"passenger_count" : "PU_Passenger_%d_%d"%(weekday, hour)}, inplace=True)
df_taxizones = pd.merge(df_taxizones, d, on="LocationID", how="left").fillna(0)
df_taxizones.rename(columns={"passenger_count" : "DO_Passenger_%d_%d"%(weekday, hour)}, inplace=True)
df_taxizones.head(2)
Bokeh Imports
from bokeh.io import output_notebook, output_file, show
from bokeh.plotting import figure
from bokeh.models import HoverTool, Select, ColumnDataSource, WheelZoomTool, LogColorMapper, LinearColorMapper
from bokeh.palettes import OrRd9 as palette
output_notebook()
Define Source for Plot. Bokeh, like its high-level API Holoviews, can convert dicts and DataFrames to a ColumnDataSource. Its columns can than be used to specify, what should be plotted.
df_taxizones["Passengers"] = df_taxizones["PU_Passenger_0_7"]
source = ColumnDataSource(df_taxizones)
Define Colormapper for zones
max_passengers_per_hour = df_taxizones[filter(lambda x: "Passenger_" in x, df_taxizones.columns)].max().max()
color_mapper = LinearColorMapper(palette=palette[::-1], high=max_passengers_per_hour, low=0)
Define Figure
p = figure(title="Titel",
plot_width=900, plot_height=450,
toolbar_location=None,
tools="pan,wheel_zoom,box_zoom,reset,save",
active_scroll="wheel_zoom")
p.xaxis.visible = False
p.yaxis.visible = False
#Get rid of zoom on axes:
for t in p.tools:
if type(t) == WheelZoomTool:
t.zoom_on_axis = False
Add Background Map (Custom Tile-Maps: http://geo.holoviews.org/Working_with_Bokeh.html)
from bokeh.models import WMTSTileSource
#Use OpenStreetMap Tiles:
tiles = WMTSTileSource(url='http://c.tile.openstreetmap.org/{Z}/{X}/{Y}.png')
#Add Tile Layer and set alpha-value:
tile_layer = p.add_tile(tiles)
tile_layer.alpha = 0.6
Draw Taxi Zone Polygons on the Map. Pass the ColumnDataSource as source, such you can use the column names to pass data to the renderer. We use the Passengers column to draw a Choropleth map.
patches = p.patches(xs="X", ys="Y", source=source,
fill_color={'field': 'Passengers', 'transform': color_mapper},
line_color="black", alpha=0.5)
show(p)
Add the Hovertool to show data of each zone (the attributes of the selected zone can be accessed by the @ key):
#Add Hover Tool:
hovertool = HoverTool(tooltips=[("Passengers:", "@Passengers")])
p.add_tools(hovertool)
show(p)
Add more advanced Hover Tools via HTML:
#Add Hovertool via HTML:
hovertool = HoverTool(tooltips="""
<head>
<style>
#demo {
background-color: rgba(255,255,255, .9);
-moz-box-shadow: inset 0 0 15px 5px rgba(200,200,200, .9);
-webkit-box-shadow: inset 0 0 15px 5px rgba(200,200,200, .9);
box-shadow: inset 0 0 15px 5px rgba(200,200,200, .9);
border-radius: 5px;
-moz-border-radius: 5px;
-webkit-border-radius: 5px;
}
</style>
</head>
<div id="demo">
<h2 style="text-align: center;">@ZoneName</h2>
<h3 style="text-align: center;">@Borough</h3>
<h4 style="text-align: center;">@Passengers Passengers</h4>
<p><img style="width: 100px; height: auto; display: block; margin-left: auto; margin-right: auto;"src="https://pbs.twimg.com/profile_images/426051037577760768/2d7K9aJc_400x400.jpeg" alt="Country Flag" /></p>
</div>""")
p.add_tools(hovertool)
show(p)
Add Slider widget for selecting the hour of the day:
from bokeh.models.widgets import Slider
slider = Slider(start=0, end=23, value=7, step=1, title="Hour", width=600)
show(slider)
Add RadioButton widgets for selecting (Pickups/Dropoffs) and the weekday:
from bokeh.models.widgets import RadioButtonGroup, Div
from bokeh.layouts import column, row
radiobuttons_weekday = RadioButtonGroup(
labels=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"],
active=0,
width=400)
radiobuttons_pudo = RadioButtonGroup(
labels=["Pickups", "Dropoff"], active=0)
layout_widgets = column(slider, row(radiobuttons_weekday, Div(width=80), radiobuttons_pudo))
show(layout_widgets)
Add Interaction via JavaScript Callback:
from bokeh.models.callbacks import CustomJS
#Define callback-function with JavaScript Code:
callback = CustomJS(args=dict(p=p, source=source, slider=slider,
radiobuttons_pudo=radiobuttons_pudo,
radiobuttons_weekday=radiobuttons_weekday),
code="""
//Get value of slider for hour:
var hour = slider.value;
//Get value of weekday:
var weekday = radiobuttons_weekday.active;
//Get value of Pickups/Dropoffs RadioButtons:
if (radiobuttons_pudo.active == 0)
var pudo = "PU"
else
var pudo = "DO"
//Change data of "Passengers" column in data source to passenger data of the selected hour:
source.data["Passengers"] = source.data[pudo + "_Passenger_" + weekday + "_" + hour];
source.change.emit();
""")
#Bind Callback to value change of slider and radiobuttons:
slider.js_on_change("value", callback)
radiobuttons_pudo.js_on_change("active", callback)
radiobuttons_weekday.js_on_change("active", callback)
#Show all elements:
layout = column( layout_widgets , p)
show(layout)
Change to logarithmic Colormapper (to see patterns in zones with low demand):
color_mapper_log = LogColorMapper(palette=palette[::-1], high=max_passengers_per_hour, low=0)
patches.glyph.fill_color["transform"] = color_mapper_log
show(layout)
To clearly see the structure of the Taxi transport flow for each cell, let us look at the difference between Pickups and Dropoffs in a zone. If this is a positive number, more people leave the zone than enter it:
df_pudo = df_taxizones.copy()
for hour in range(24):
for weekday in range(7):
df_pudo["PUDO_%d_%d"%(weekday, hour)] = df_pudo["PU_Passenger_%d_%d"%(weekday, hour)] - df_pudo["DO_Passenger_%d_%d"%(weekday, hour)]
df_pudo["PUDO_%d_%d"%(weekday, hour)] = df_pudo["PUDO_%d_%d"%(weekday, hour)].apply(lambda x: "PU > DO" if x>0 else "PU < DO")
df_pudo.drop(columns=filter(lambda x: "Passenger" in x, df_pudo.columns), inplace=True)
df_pudo["PUDO"] = df_pudo["PUDO_0_7"]
df_pudo.head(3)
from bokeh.transform import factor_cmap
#Define Categorical Color Map for Plot (red="PU > DO", blue="PU < DO"):
categorical_cmap = factor_cmap("PUDO", palette=["red", "blue"], factors=["PU > DO", "PU < DO"] )
#Define Source for Plot:
source = ColumnDataSource(df_pudo)
#Define Figure for Plot:
p = figure(title="Titel",
plot_width=900, plot_height=450,
toolbar_location=None,
tools="pan,wheel_zoom,box_zoom,reset,save",
active_scroll="wheel_zoom")
p.xaxis.visible = False
p.yaxis.visible = False
#Get rid of zoom on axes:
for t in p.tools:
if type(t) == WheelZoomTool:
t.zoom_on_axis = False
#Use OpenStreetMap Tiles:
tiles = WMTSTileSource(url='http://c.tile.openstreetmap.org/{Z}/{X}/{Y}.png')
#Add Tile Layer and set alpha-value:
tile_layer = p.add_tile(tiles)
tile_layer.alpha = 0.6
patches = p.patches(xs="X", ys="Y", source=source,
fill_color=categorical_cmap,
line_color="black", alpha=0.5,
legend="PUDO")
slider = Slider(start=0, end=23, value=7, step=1, title="Hour", width=350)
radiobuttons_weekday = RadioButtonGroup(
labels=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"],
active=0,
width=350)
#Define callback-function with JavaScript Code:
callback = CustomJS(args=dict(p=p, source=source, slider=slider,
radiobuttons_weekday=radiobuttons_weekday),
code="""
//Get value of slider for hour:
var hour = slider.value;
//Get value of weekday:
var weekday = radiobuttons_weekday.active;
//Change data of "PUDO" column in data source to passenger data of the selected hour:
source.data["PUDO"] = source.data["PUDO_" + weekday + "_" + hour];
source.change.emit();
""")
#Bind Callback to value change of slider and radiobuttons:
slider.js_on_change("value", callback)
radiobuttons_weekday.js_on_change("active", callback)
show(column(row(slider, Div(width=100), radiobuttons_weekday), p))
Use GPX Dataset for Germany (only identifiable trips) from OpenStreetMap (© OpenStreetMap-Mitwirkende). From the GPX files a final DataFrame (only containing Latitude and Longitude) was created. Let us load the data into the DataFrame:
Note: The OSM Licence Conditions also apply to the provided data.
df_osm = pd.read_parquet("Data\OSM GPX\OSM_GPX.parquet")
display_side_by_side(df_osm.query("TripId==1").head(), df_osm.query("TripId==2").head())
Calculate distances between the GPS points of a trip:
def calc_distance_from_lonlat(x1, y1, x2, y2):
"""Calculates the distance between two points in WGS84 projection (Lat, Lon)
based on a linear approximation around the origin of the first point. Works
well for smaller distances < 100km.
"""
return np.sqrt((np.cos(y1 * 2 * np.pi / 360) * (x2 - x1))**2 +
(y2 - y1)**2) * 6371 * 2 * np.pi / 360
#Calculate postlength of every GPS point:
df_osm["Postlength"] = calc_distance_from_lonlat(df_osm["longitude"], df_osm["latitude"], df_osm["longitude"].shift(-1), df_osm["latitude"].shift(-1))
#Set Postlength of every last element of each trip to NaN:
df_osm.loc[df_osm["TripId"]-df_osm["TripId"].shift(-1)!=0, "Postlength"] = np.NaN
display_side_by_side(df_osm.query("TripId==1").head(), df_osm.query("TripId==1").tail())
Let us quickly create some easy plots with basic statistics about the GPS data
Histogram with number of points per trip:
%%output filename="Export/Trip_points" fig="html"
%%opts Histogram [width=500 tools=['hover']]
%%opts Histogram (fill_color='red')
#Count Number of Points for every trip:
trip_points = df_osm.groupby("TripId")["latitude"].count().values
#Calculate 90 percent quantile for setting plotting range:
quantile_90 = np.percentile(trip_points, 90)
#Plot Histogram via Holoviews (output to Data/Trip_points due to the %%magic command in the top of the cell):
frequencies, edges = np.histogram(trip_points, np.arange(0, trip_points.max()+100, 100))
p_trip_points = hv.Histogram((edges, frequencies), label="Number of GPS Points per Trip")
p_trip_points = p_trip_points.redim.label(x="Number of Points", Frequency="Trips")
p_trip_points = p_trip_points.redim.range(x=(0,quantile_90))
p_trip_points
Trip length distribution
%%output filename="Export/Trip_lengths" fig="html"
%%opts Histogram [width=500 tools=['hover']]
%%opts Histogram (fill_color='red')
#Count Number of Points for every trip:
trip_length = df_osm.groupby("TripId")["Postlength"].sum().values
#Calculate 90 percent quantile for setting plotting range:
quantile_90 = np.percentile(trip_length, 90)
#Plot Histogram via Holoviews (output to Data/Trip_points due to the %%magic command in the top of the cell):
frequencies, edges = np.histogram(trip_length, np.arange(0, 1000, 1))
p_trip_length = hv.Histogram((edges, frequencies), label="Trip Distances")
p_trip_length = p_trip_length.redim.label(x="Length [km]", Frequency="Trips")
p_trip_length = p_trip_length.redim.range(x=(0,quantile_90))
p_trip_length
Add coordinates in Web Mercador:
from pyproj import Proj, transform
#Define initial and output Projection (WGS84 and Web-Mercador):
inProj = Proj(init='epsg:4326')
outProj = Proj(init='epsg:3857')
#Add Web Mercador coordinates to columns X, Y
df_osm["X"], df_osm["Y"] = transform(inProj, outProj, df_osm["longitude"].values, df_osm["latitude"].values)
df_osm.head()
Filter data with Germany Bounding-Box:
minlat, maxlat, minlon, maxlon = 47.3, 55, 5.9, 15.1 #Bounding Box Germany
#maxlat, minlon, minlat, maxlon = 52.57, 13.25, 52.45, 13.5 #Bounding Box Berlin City
df_osm_filtered = df_osm[(df_osm["latitude"]>minlat)&(df_osm["latitude"]<maxlat)&(df_osm["longitude"]>minlon)&(df_osm["longitude"]<maxlon)].copy()
Plot Heatmap of GPS Data using DataShader. Underlay heatmap with OSM map using GeoViews:
import holoviews as hv
import geoviews as gv
from colorcet import fire
from holoviews.operation.datashader import datashade
hv.extension('bokeh')
%%output filename="Export//Heatmap" fig="html"
#Define size of image:
width = 1000
height = 600
#Define sampling size of DataShader aggregation:
x_sampling = int((df_osm_filtered["X"].max()-df_osm_filtered["X"].min())/width)
y_sampling = int((df_osm_filtered["Y"].max()-df_osm_filtered["Y"].min())/height)
#Define DataShader Heatmap and Background Map Tiles:
tile_opts = dict(width=width,height=height,xaxis=None,yaxis=None,bgcolor='black',show_grid=False)
map_tiles = gv.WMTS('http://c.tile.openstreetmap.org/{Z}/{X}/{Y}.png').opts(style=dict(alpha=0.5), plot=tile_opts)
points = hv.Points(df_osm_filtered, ['X', 'Y'])
trips = datashade(points, x_sampling=1, y_sampling=1, cmap=fire, width=width, height=width)
#Combine DataShader Heatmap and Background Map Tiles:
p = map_tiles * trips
#Set wheelzoom active
def set_active_tool(plot, element):
plot.state.toolbar.active_scroll = plot.state.tools[3]
p = p.options(finalize_hooks=[set_active_tool])
p
The generated analysis and plots can be either put together like seen in the section about the New York Taxis zones, or one might use an HTML Template to embed the plots and data. Here, we use the Jinja package to easily embed the analysis. Let us first create some basic stats for an Overview Page:
overview_table = []
overview_table.append(dict(key="Source", value='<a href="https://wiki.openstreetmap.org/wiki/Planet.gpx">Open Street Map (Planet GPX)</a>'))
overview_table.append(dict(key="Minimum Latitude", value=df_osm["latitude"].min().round(5)))
overview_table.append(dict(key="Maximum Latitude", value=df_osm["latitude"].max().round(5)))
overview_table.append(dict(key="Minimum Longitude", value=df_osm["longitude"].min().round(5)))
overview_table.append(dict(key="Maximum Longitude", value=df_osm["longitude"].max().round(5)))
overview_table.append(dict(key="Number of Trips", value=len(df_osm["TripId"].unique())))
overview_table.append(dict(key="Number of GPS Points", value=len(df_osm)))
overview_table.append(dict(key="Average number of GPS points per trip", value=len(df_osm)/len(df_osm["TripId"].unique())))
overview_table.append(dict(key="Average distance per trip", value="%d km"%trip_length.mean()))
overview_table
Load Interactive Plots as HTML:
with open("Export/Trip_points.html", "r") as f:
p_trips = f.read()
with open("Export/Trip_lengths.html", "r") as f:
p_lengths = f.read()
with open("Export/Heatmap.html", "r") as f:
p_heatmap = f.read()
Load HTML Template via Jinja and embed Basic Statistics as a Table and the Interactive Plots:
#Load Template in 'Data/Template.html' via Jinja:
from jinja2 import Template
with open('Data/Template.html') as f:
template = Template(f.read())
#Render template with our analysis and plots:
final_html = template.render(overview_table=overview_table, p_trips=p_trips,
p_lengths=p_lengths, p_heatmap=p_heatmap)
#Output final HTML in "Data/"Overview.html":
import codecs
with codecs.open("Export/Overview.html", "w", encoding="UTF-8") as f:
f.write(final_html)
#Open Resulting HTML:
import webbrowser, os
webbrowser.open(os.path.abspath("Export/Overview.html"));
Read in data from CSV
df_locations = pd.read_csv("Data/PTV_Locations.csv", sep=",")
df_locations.fillna("", inplace=True)
df_locations.head()
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.layouts import column
from bokeh.models import HoverTool, Select, ColumnDataSource, WMTSTileSource
from bokeh.models.callbacks import CustomJS
from bokeh.tile_providers import STAMEN_TERRAIN
from bokeh.models import WheelZoomTool
output_notebook()
#Define Figure:
p = figure(title="PTV Locations",
plot_width=700, plot_height=450,
x_axis_label='x_axis',
y_axis_label='y_axis',
toolbar_location=None,
tools="pan,wheel_zoom,box_zoom,reset,save",
active_scroll="wheel_zoom")
p.xaxis.visible = False
p.yaxis.visible = False
#Get ridd of zoom on axes:
for t in p.tools:
if type(t) == WheelZoomTool:
t.zoom_on_axis = False
#Add Background Map:
#p.add_tile(STAMEN_TERRAIN)
tile = WMTSTileSource(url='https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}')
p.add_tile(tile)
#Define ColumnDataSource:
source = ColumnDataSource(df_locations)
#Plot PTV Locations on World Map:
ax = p.scatter(x="X", y="Y", source=source,
size=10, fill_color="red", line_color="black")
#Add Hover Tool:
my_hover = HoverTool(attachment="below")
css_style = """<style>
#hover {
font-family: "Trebuchet MS", Arial, Helvetica, sans-serif;
border-collapse: collapse;
width = 250px;
}
#hover td, #hover th {
border: 1px solid #ddd;
text-align: center;
padding: 5px;
width:125px;
}
#hover tr:nth-child(even){background-color: #f2f2f2;}
#hover th {
padding-top: 5px;
padding-bottom: 5px;
text-align: center;
background-color: #b50000;
color: white;
width:125px;
}
#hover ex {
padding-top: 5px;
padding-bottom: 5px;
text-align: center;
background-color: #b50000;
color: black;
width:125px;
}
</style>"""
my_hover.tooltips = css_style + """
<div style="width:500px"><h2>@Name <img style="margin-right: 35px;float:right;" src="https://www.xing.com/img/custom/cp/assets/logo/b/4/7/11079/square_128px/PTV_Group_10cm_farbig20140304-11381-15uhov8.jpg" alt="" /> </h2> </div>
<h3>@Country </h3>
<h4>@City, @Address </h4>
<h5>Telefone: @Telefone </h5>
<h5>Email: <a href=@Email> @Email </a> </h5>
"""
p.add_tools(my_hover)
show(p)
Export components to create HTML content that can be embedded into another DIV:
#Get components of plot:
from bokeh.embed import components
script, div = components(p)
#Define source for Bokeh CSS and JS:
import bokeh
version = bokeh.__version__
source = """<link
href="https://cdn.pydata.org/bokeh/release/bokeh-<release>.min.css"
rel="stylesheet" type="text/css">
<link
href="https://cdn.pydata.org/bokeh/release/bokeh-widgets-<release>.min.css"
rel="stylesheet" type="text/css">
<link
href="https://cdn.pydata.org/bokeh/release/bokeh-tables-<release>.min.css"
rel="stylesheet" type="text/css">
<script src="https://cdn.pydata.org/bokeh/release/bokeh-<release>.min.js"></script>
<script src="https://cdn.pydata.org/bokeh/release/bokeh-widgets-<release>.min.js"></script>
<script src="https://cdn.pydata.org/bokeh/release/bokeh-tables-<release>.min.js"></script>""".replace("<release>", version)
#Export final HTML content:
html_content = source + script + div
with open("Export//PTV_Locations.html", "w") as f:
f.write(html_content)